############################################################################
# SCIENCE ARTICLE
# REPLICATION CODE, CHOROPLETH MAPS
# JULY 15, 2016
############################################################################
ipak <- function(pkg){
    new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
    if (length(new.pkg)) 
        install.packages(new.pkg, dependencies = TRUE)
    sapply(pkg, require, character.only = TRUE)
}

# usage
packages <- c("maptools", "mapproj", "rgeos", "rgdal", "ggplot2", "RColorBrewer","stringr","scales","foreign")
ipak(packages)

library(maptools)
library(mapproj)
library(rgeos)
library(rgdal)
library(ggplot2)
library(RColorBrewer)
library(stringr)
library(scales)
library(foreign)
#library(ineq)

#========================================================
# Step 1: Set up the base map of the United States
#========================================================

theme_map <- function(base_size=9, base_family="") {
    require(grid)
    theme_bw(base_size=base_size, base_family=base_family) %+replace%
    theme(axis.line=element_blank(),
          axis.text=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          panel.background=element_blank(),
          panel.border=element_blank(),
          panel.grid=element_blank(),
          panel.margin=unit(0, "lines"),
          plot.background=element_blank(),
          legend.justification = c(0,0),
          legend.position = c(0,0)
          )
}

## Read U.S. counties moderately-simplified GeoJSON file
us.counties <- readOGR(dsn="gz_2010_us_050_00_5m.json",
                       layer="OGRGeoJSON")

# Convert it to Albers equal area
us.counties.aea <- spTransform(us.counties,
                               CRS("+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +a=6370997 +b=6370997 +units=m +no_defs"))

us.counties.aea@data$id <- rownames(us.counties.aea@data)

# Extract, then rotate, shrink & move alaska (and reset projection)
# need to use state IDs via # https://www.census.gov/geo/reference/ansi_statetables.html
alaska <- us.counties.aea[us.counties.aea$STATE=="02",]
alaska <- elide(alaska, rotate=-50)
alaska <- elide(alaska, scale=max(apply(bbox(alaska), 1, diff)) / 2.3)
alaska <- elide(alaska, shift=c(-2100000, -2500000))
proj4string(alaska) <- proj4string(us.counties.aea)

# extract, then rotate & shift hawaii
hawaii <- us.counties.aea[us.counties.aea$STATE=="15",]
hawaii <- elide(hawaii, rotate=-35)
hawaii <- elide(hawaii, shift=c(5400000, -1400000))
proj4string(hawaii) <- proj4string(us.counties.aea)

# remove old states and put new ones back in; note the different order
# we're also removing puerto rico 
us.counties.aea <- us.counties.aea[!us.counties.aea$STATE %in% c("02", "15", "72"),]
us.counties.aea <- rbind(us.counties.aea, alaska, hawaii)

#========================================================
# Step 2: Merge county-level dataset with base map
#========================================================

state.data <- read.csv("state-data-statabs-2012.csv", header=TRUE)

county.names <- read.csv("fips-by-state.csv", header=TRUE)
county.names$fips  <- as.character(county.names$fips)
ind <- county.names$fips<10000
county.names$id[ind] <- paste("0", county.names$id[ind], sep="")
county.names$id[county.names$id=="00"] <- "0"

county.data <- read.csv("maps_matched.csv", header=TRUE, sep=",")
colnames(county.data) = c("fips", "amount", "num_patents", "num_citations", "num_campaigns","num_successful","successful_100","vc_count","vc_amount", "total_ks_amount", "total_vc_amount", "cf_patents", "vc_patents", "cf_cites", "vc_cites","prop")
county.data = merge(county.names, county.data, by.x="fips", by.y="fips", all.x=TRUE)
county.data$fips <- as.numeric(county.data$fips)
ind <- county.data$fips<10000
county.data$id <- as.character(county.data$fips)
county.data$id[ind] <- paste("0", county.data$id[ind], sep="")
county.data$id[county.data$id=="00"] <- "0"

# replace NA data with zeros for counties with no capital
county.data$prop <- ifelse(county.data$prop==0, NA, county.data$prop)

ind <- match(county.data$fips, county.names$fips)
county.data$name <- county.names$name[ind]
county.data$state <- county.names$state[ind]

ind <- match(state.data$State.Abbr, county.data$state)
county.data$state[ind] <- state.data$State.Abbr

## Add state names as levels of county name, so states have FIPS too
levels(county.data$name) <- c(levels(county.data$name), levels(state.data$State))
county.data$name[ind] <- state.data$State

### Add census region. Don't call the variable "region" because that's
### already reserved by the map object
ind <- match(county.data$state, state.data$State.Abbr)
county.data$census.region <- state.data$Region[ind]

library(Hmisc)
county.data$prop <- cut2(county.data$prop,
#"KS=0, VC>0","0 < KS/VC < 0.5", "0.5 < KS/VC < 0.9","0.9 < KS/VC < 1.1", "1.1 < KS/VC < 2 ", "2 < KS/VC", "KS>0, VC=0"
                             cuts = c(0,.5, 1, 1.1, 2, 1060, 9800, 9991, 9999, 10000))     
co.map <- fortify(us.counties.aea, region="GEO_ID")
co.map$id <- str_replace(co.map$id, "0500000US", "")

co.map <- merge(co.map, county.data, by="id")

#========================================================
# Step 3: Generate the choropleth maps
#========================================================
            
#Proportion Kickstarter to Venture Capital-----------------------------
p <- ggplot(data=co.map, aes(x=long, y=lat, group=group))

p1 <- p + geom_map(data=co.map,
                   map = co.map,
                   aes(map_id=id,
                       x=long,
                       y=lat,
                       group=group,
                       fill=prop),
                   color="black",
                   size=0.2)
     
colors <- c("peachpuff", "lightsalmon", "chocolate1", "firebrick3", "darkred", "royalblue3", "darkolivegreen2")

p2 <- p1 + scale_fill_manual(values=colors, na.value="gray77", labels=c("0 < KS/VC < 0.5", "0.5 < KS/VC < 0.9","0.9 < KS/VC < 1.1","1.1 < KS/VC < 2 ", "KS/VC > 2", "KS>0, VC=0", "KS=0, VC>0"))
#p2 <- p1 + scale_fill_brewer(type="seq", palette="Oranges", na.value="grey79", labels=c("0 < Ratio < 0.5", "0.5 < Ratio < 0.9","0.9 < Ratio < 1.1","1.1 < Ratio < 2 ", "2 < Ratio", "KS=0, VC>0", "KS>0, VC=0"))
p2 <- p2 + coord_equal()
p2 <- p2 + theme_map()
p2 <- p2 + theme(legend.position=c(0.9,0),legend.text=element_text(size=7), legend.title=element_text(size=12)) + labs(fill="")+ guides(fill=guide_legend(reverse=F), colour=guide_legend(reverse=F))
p2 <- p2 + ggtitle("") #+ annotate("text", x = 0.6*max(co.map$long), y = 0.8*max(co.map$lat), size=5, color="black", label =paste("Gini=",gini_prop_m,"\nHHI=",hhi_prop_m))
p2

ggsave("Fig1.png",
       p2,
       height=8,
       width=8,
       dpi=300)

